Code
library(mapview)
library(sf)
library(tidyverse)
library(tmap)
library(tbeptools)
library(khroma)
library(corrplot)We don’t even really know the best way to even start looking at these relationships, so this portion will contain some exploratory visualizations.
So to start, I’m just going to pull out a few sites where I would expect to see some influence - sites that aren’t in the bays, but are maybe around the edges. I’ll hand-pick and hard-code, to figure out what this all even looks like, then figure out how to scale up.
We have monthly precipitation data and hydrologic load data.
library(mapview)
library(sf)
library(tidyverse)
library(tmap)
library(tbeptools)
library(khroma)
library(corrplot)Read in shapefiles and other data files here.
swfwmdpixels <- read_sf(here::here("GIS", "swfwmd_pixel_2_utm_m_83.shp"))
bay_segs <- read_sf(here::here("GIS", "TBEP-Bay_Segments.shp")) |>
st_transform(crs = st_crs(swfwmdpixels))
hydro <- readxl::read_xlsx(here::here("hydro-load-data",
"monthly-hydrology.xlsx"))
hydro <- hydro |>
rename(hydro_load = hy_load_106_m3_mo) |>
group_by(bay_segment) |>
mutate(lag1_hydro = lag(hydro_load)) |>
ungroup()
hydro_sub <- hydro |>
dplyr::filter(year >= 2009)Subset pixel file, merge datasets that need merging, turn into spatial objects.
precip_pixels <- st_intersection(swfwmdpixels, bay_segs)
# pixels to pull rainfall from,
# and their associated bay segments
# (for group-wise averaging)
pixs <- precip_pixels |>
st_drop_geometry() |>
select(PIXEL, BAY_SEG)
# read in compiled rainfall (see R/rainfall_preprocessing.R)
prcp <- readRDS(here::here("SWFWMD-rainfall-data", "compiledRainfall.rds"))
prcp2 <- prcp |>
filter(PIXEL %in% unique(pixs$PIXEL))
# okaaaay, guess I managed to only pull Tampa Bay data anyway. Good check though.
rm(prcp2)
daily_prcp <- readRDS(here::here("SWFWMD-rainfall-data", "Daily_Data", "compiledDailyRainfall.rds"))Which precip to look at? Total for watershed? total within X kilometers of a station? (and what should X be?) Maybe one task is to figure out what’s best. Could vary by watershed too though - Hillsborough Bay’s is way bigger than Middle Tampa Bay’s…. maybe a bigger time lag in situations like that (but still, if we’re only looking at monthly….)
# pull a few months and map both by pixel,
# and averaged up to bay segment
# make sure it works
# there are some pixels along bay segment boundaries
# I set this to be a full join so that the precip from
# those pixels goes into each bay segment average
# that they're part of
prcp <- prcp |>
full_join(pixs, by = "PIXEL",
relationship = "many-to-many") |>
group_by(PIXEL) |>
mutate(prcp_lag1 = lag(inches)) |>
ungroup()
daily_prcp <- daily_prcp |>
full_join(pixs, by = "PIXEL",
relationship = "many-to-many") |>
group_by(PIXEL) |>
mutate(daily_lag1 = lag(inches, 1),
daily_lag2 = lag(inches, 2),
daily_lag3 = lag(inches, 3),
daily_lag4 = lag(inches, 4),
daily_lag5 = lag(inches, 5),
daily_lag6 = lag(inches, 6),
daily_24hrs = inches + daily_lag1,
daily_48hrs = inches + daily_lag1 + daily_lag2,
daily_7d = inches + daily_lag1 + daily_lag2 + daily_lag3 + daily_lag4 + daily_lag5 + daily_lag6) |>
ungroup() |>
select(PIXEL, date, inches, BAY_SEG,
daily_24hrs, daily_48hrs, daily_7d)
prcp_bayseg <- prcp |>
summarize(.by = c(BAY_SEG, month, year),
inches = mean(inches, na.rm = TRUE)) |>
filter(year == 2022) |>
left_join(bay_segs) |>
st_as_sf()
prcp_daily_bayseg <- daily_prcp |>
summarize(.by = c(BAY_SEG, date),
across(c(inches, daily_24hrs, daily_48hrs, daily_7d),
function(x) mean(x, na.rm = TRUE)))
prcp_pixel <- prcp |>
select(-BAY_SEG) |>
distinct() |>
filter(year == 2022) |>
left_join(select(precip_pixels, PIXEL, geometry)) |>
st_as_sf()prcp_bayseg |>
filter(month == 1) |>
mapview(zcol = "inches",
layer.name = "Jan 2022")prcp_pixel |>
filter(month == 1) |>
mapview(zcol = "inches",
layer.name = "Jan 2022")tm_shape(prcp_bayseg) +
tm_borders() +
tm_fill("inches",
n = 6,
palette = "YlGnBu") +
tm_facets(by = "month", nrow = 4)
tm_shape(prcp_pixel) +
tm_fill("inches",
n = 6,
palette = "YlGnBu") +
tm_facets(by = "month", nrow = 4)
Variations throughout a year are so large that my worry about averaging up to bay segment being too rough is…. not really relevant after all. Should be fine.
prcp_bayseg <- prcp |>
summarize(.by = c(BAY_SEG, month, year),
inches = mean(inches, na.rm = TRUE)) |>
filter(year == 2022) |>
left_join(bay_segs) |>
st_as_sf()
tm_shape(prcp_bayseg) +
tm_borders() +
tm_fill("inches",
palette = "YlGnBu",
n = 10) +
tm_facets(by = c("month", "year"))
Bay segment level should be fine.
I’d expect these to correlate pretty well. Wonder if that changes by month and/or wet vs. dry season. Check that out here.
Subset to only the specific bay segments in both datasets (am unsure whether to average precip across different bay segments, which may be different sizes….. probably should do that at the pixel-to-bay-seg level if I want to)
combnd <- prcp |>
mutate(bay_segment = case_match(BAY_SEG,
"Boca Ciega Bay" ~ "Remainder Lower Tampa Bay",
"Manatee River" ~ "Remainder Lower Tampa Bay",
"Terra Ceia Bay" ~ "Remainder Lower Tampa Bay",
.default = BAY_SEG)) |>
summarize(.by = c(bay_segment, month, year),
inches = mean(inches, na.rm = TRUE)) |>
group_by(bay_segment) |>
mutate(lag1_prcp = lag(inches)) |>
ungroup() |>
filter(year >= 2009,
bay_segment %in% unique(hydro$bay_segment)) |>
left_join(hydro, by = c("bay_segment", "year", "month"))May have log-log relationships, or non-linear relationships, or need to look at it some different way. Let’s see.
p <- ggplot(combnd) +
geom_point(aes(x = inches, y = hydro_load,
col = bay_segment))
p
Generally positive relationship (as you’d expect) but lots of spread, especially at higher precipitation levels.
Looks like a lot of that variability is due to the different bay segments.
p +
facet_wrap(~bay_segment)
Hillsborough Bay has a lot of variability - not surprising since it’s the biggest of the watersheds. Looks like the relationships hold until about 12 inches, then higher points are both infrequent and possibly show some leveling off. This includes ~15 years of data (2009 and later) and looks almost exactly the same as when I used only 2016 and above.
Quick look at hydrologic load and the previous month’s precip:
ggplot(combnd) +
geom_point(aes(x = lag1_prcp, y = hydro_load,
col = bay_segment)) +
facet_wrap(~bay_segment) +
theme(legend.position = "none") +
labs(x = "previous month's precipitation (inches)",
y = "hydrologic load")
Looks pretty similar, though with more variation; not entirely unexpected at this aggregation level.
ggplot(combnd) +
geom_point(aes(x = inches, y = hydro_load,
col = factor(month))) +
scale_color_brewer(palette = "Paired") +
facet_wrap(~bay_segment)
Makes sense that the relationship would vary by bay segment/watershed size. In Hillsborough Bay, we sort of see Aug-October on the high side of the hydrologic loads, even for the same amount of precipitation as in (e.g.) April.
ggplot(combnd) +
geom_point(aes(x = inches, y = hydro_load,
col = bay_segment)) +
scale_color_brewer(palette = "Set1") +
facet_wrap(~month)
Varies a lot by month and bay segment, especially in September.
ggplot(combnd) +
geom_boxplot(aes(x = factor(month), y = inches,
col = bay_segment)) +
scale_color_brewer(palette = "Set1") +
labs(title = "Precip variability by month x bay segment",
x = "Month",
y = "Monthly Total Precipitation (inches)")
ggplot(combnd) +
geom_boxplot(aes(x = factor(month), y = hydro_load,
col = bay_segment)) +
scale_color_brewer(palette = "Set1") +
labs(title = "Hydro load variability by month x bay segment",
x = "Month",
y = "Monthly hydrologic load (10^6 m3)")
Precip is similar between watersheds; hydrologic load is highest in Hillsborough Bay and lowest in lower Tampa Bay. Middle and Old Tampa Bay are pretty comparable.
fib_dat <- readRDS(here::here("fib-data", "tbeptools_importwqp.rds"))
fib_sub <- fib_dat |>
filter(yr >= 2009,
class == "Estuary",
var == "ecocci")
# get those attached to bay segments
fib_baysegs <- fib_sub |>
st_as_sf(coords = c("Longitude", "Latitude"),
crs = "WGS84",
remove = FALSE) |> # WGS84 is default for handheld GPS units
st_transform(crs = st_crs(swfwmdpixels)) |>
st_intersection(bay_segs) |>
mutate(bay_segment = case_match(BAY_SEG,
"Boca Ciega Bay" ~ "Remainder Lower Tampa Bay",
"Manatee River" ~ "Remainder Lower Tampa Bay",
.default = BAY_SEG))
fib_hydro <- left_join(fib_baysegs, combnd,
by = c("yr" = "year",
"mo" = "month",
"bay_segment"))fib_daily_prcp <- fib_baysegs |>
mutate(date = as.Date(SampleTime)) |>
left_join(prcp_daily_bayseg,
by = c("date", "BAY_SEG"))ggplot(fib_hydro, aes(x = hydro_load,
y = val)) +
geom_jitter(aes(col = factor(mo)),
size = 2,
alpha = 0.5) +
geom_smooth() +
geom_rug(col = "gray40") +
scale_y_log10() +
scale_x_log10() +
facet_wrap(~bay_segment) +
labs(title = "Entero vs. hydrologic load; log-log scale",
x = "Hydrologic Load",
y = "Enterococcus",
col = "Month")
Interesting….. it’s not necessarily the monthly hydrologic loads that matter (though it’s clear that the higher hydrologic loads occur in certain months). Could come down to precipitation in some previous time periods.
Could also be that because the majority of stations are in the bay proper, they’re diluted and that’s driving the lack-of-relationship. Really need to break this all out by “in-bay” vs. “have some land near the station”.
ggplot(fib_hydro, aes(x = lag1_hydro,
y = val)) +
geom_jitter(aes(col = factor(mo)),
size = 2,
alpha = 0.5) +
geom_smooth() +
geom_rug(col = "gray40") +
scale_y_log10() +
scale_x_log10() +
facet_wrap(~bay_segment) +
labs(title = "Entero vs. lagged hydrologic load; log-log scale",
x = "Hydrologic Load in previous month",
y = "Enterococcus",
col = "Month")
Nothing new.
First a little more exploration of entero concentrations by time of year.
ggplot(fib_hydro, aes(x = inches,
y = val)) +
geom_jitter(aes(col = factor(mo)),
size = 2,
alpha = 0.3) +
geom_smooth() +
geom_rug(col = "gray40") +
scale_y_log10() +
scale_x_log10() +
facet_wrap(~bay_segment) +
labs(title = "Entero vs. Monthly Precip",
x = "Precipitation (in)",
y = "Enterococcus",
col = "Month")
We see it a bit more with precip rather than hydrologic load, at least in the higher values.
ggplot(fib_hydro, aes(x = lag1_prcp,
y = val)) +
geom_jitter(aes(col = factor(mo)),
size = 2,
alpha = 0.3) +
geom_smooth() +
geom_rug(col = "gray40") +
scale_y_log10() +
scale_x_log10() +
facet_wrap(~bay_segment) +
labs(title = "Entero vs. Lagged Precip",
x = "Precipitation (in) in previous month",
y = "Enterococcus",
col = "Month")
Yeah, not that different.
ggplot(fib_hydro,
aes(x = factor(mo),
y = val)) +
geom_jitter(aes(col = factor(mo)),
size = 0.7,
alpha = 0.4) +
geom_boxplot(alpha = 0) +
scale_y_log10() +
facet_wrap(~bay_segment) +
labs(title = "Entero counts by month",
x = "Month",
y = "Enterococcus") +
theme(legend.position = "none")
ggplot(hydro_sub,
aes(x = factor(month),
y = hydro_load)) +
geom_jitter(aes(col = factor(month)),
size = 0.7,
alpha = 0.6) +
geom_boxplot(alpha = 0) +
scale_y_log10() +
facet_wrap(~bay_segment) +
labs(title = "Hydrologic Load by month",
x = "Month",
y = "Hydrologic Load") +
theme(legend.position = "none")
ggplot(combnd,
aes(x = factor(month),
y = inches)) +
geom_jitter(aes(col = factor(month)),
size = 0.7,
alpha = 0.6) +
geom_boxplot(alpha = 0) +
scale_y_log10() +
facet_wrap(~bay_segment) +
labs(title = "Precip by month",
x = "Month",
y = "Monthly Precipitation (inches)") +
theme(legend.position = "none")
tbseg2 <- tbseg |>
st_transform(crs = st_crs(swfwmdpixels))
fib_nonbay <- st_filter(fib_hydro, st_union(tbseg2), .predicate = st_disjoint)Note, this isn’t good spatial coverage - may want to think about including freshwater sites too (eek).
mapview(fib_nonbay)ggplot(fib_nonbay,
aes(x = factor(mo),
y = val)) +
geom_jitter(aes(col = factor(mo)),
size = 0.7,
alpha = 0.4) +
geom_boxplot(alpha = 0) +
scale_y_log10() +
facet_wrap(~bay_segment) +
labs(title = "Entero counts by month",
subtitle = "stations that are not in the bay proper",
x = "Month",
y = "Enterococcus") +
theme(legend.position = "none")
Still really not any cycling…. surprising. Maybe a little in Hillsborough Bay.
ggplot(fib_nonbay, aes(x = hydro_load,
y = val)) +
geom_jitter(aes(col = factor(mo)),
size = 2,
alpha = 0.5) +
geom_smooth() +
geom_rug(col = "gray40") +
scale_y_log10() +
# scale_x_log10() +
facet_wrap(~bay_segment) +
labs(title = "Entero vs. hydrologic load",
subtitle = "stations that are not in the bay proper",
x = "Hydrologic Load",
y = "Enterococcus",
col = "Month")
ggplot(fib_nonbay, aes(x = hydro_load,
y = val)) +
geom_jitter(aes(col = bay_segment),
size = 2,
alpha = 0.5) +
geom_smooth() +
geom_rug(col = "gray40") +
scale_y_log10() +
scale_x_log10() +
# facet_wrap(~bay_segment) +
labs(title = "Entero vs. hydrologic load; log-log scale",
subtitle = "stations that are not in the bay proper",
x = "Hydrologic Load",
y = "Enterococcus",
col = "Month")
ggplot(fib_nonbay, aes(x = inches,
y = val)) +
geom_jitter(aes(col = bay_segment),
size = 2,
alpha = 0.5) +
geom_smooth() +
geom_rug(col = "gray40") +
scale_y_log10() +
# scale_x_log10() +
# facet_wrap(~bay_segment) +
labs(title = "Entero vs. precip",
subtitle = "stations that are not in the bay proper",
x = "Precipitation (in)",
y = "Enterococcus",
col = "Month")
ggplot(fib_nonbay, aes(x = inches,
y = val)) +
geom_jitter(aes(col = bay_segment),
size = 2,
alpha = 0.5) +
geom_smooth() +
geom_rug(col = "gray40") +
scale_y_log10() +
# scale_x_log10() +
facet_wrap(~bay_segment) +
labs(title = "Entero vs. precip",
subtitle = "stations that are not in the bay proper",
x = "Precipitation (in)",
y = "Enterococcus",
col = "Month")
ggplot(fib_nonbay, aes(x = inches,
y = val)) +
geom_jitter(aes(col = bay_segment),
size = 2,
alpha = 0.5) +
geom_smooth() +
geom_rug(col = "gray40") +
scale_y_log10() +
scale_x_log10() +
facet_wrap(~bay_segment) +
labs(title = "Entero vs. precip; log-log scale",
subtitle = "stations that are not in the bay proper",
x = "Precipitation (in)",
y = "Enterococcus",
col = "Month")
Changing the x-axis between log and ‘regular’ really changes how it looks.
ggplot(fib_nonbay, aes(x = lag1_prcp,
y = val)) +
geom_jitter(aes(col = bay_segment),
size = 2,
alpha = 0.5) +
geom_smooth() +
geom_rug(col = "gray40") +
scale_y_log10() +
# scale_x_log10() +
# facet_wrap(~bay_segment) +
labs(title = "Entero vs. lagged precip",
subtitle = "stations that are not in the bay proper",
x = "Previous Month's Precipitation (in)",
y = "Enterococcus",
col = "Month")
ggplot(fib_nonbay, aes(x = lag1_prcp,
y = val)) +
geom_jitter(aes(col = as.factor(mo)),
size = 2,
alpha = 0.5) +
geom_smooth() +
geom_rug(col = "gray40") +
scale_y_log10() +
# scale_x_log10() +
facet_wrap(~bay_segment) +
labs(title = "Entero vs. lagged precip",
subtitle = "stations that are not in the bay proper",
x = "Previous Month's Precipitation (in)",
y = "Enterococcus",
col = "Month")
ggplot(fib_nonbay, aes(x = lag1_prcp,
y = val)) +
geom_jitter(aes(col = factor(mo)),
size = 2,
alpha = 0.5) +
geom_smooth() +
geom_rug(col = "gray40") +
scale_y_log10() +
scale_x_log10() +
facet_wrap(~bay_segment) +
labs(title = "Entero vs. lagged precip; log-log scale",
subtitle = "stations that are not in the bay proper",
x = "Precipitation (in)",
y = "Enterococcus",
col = "Month")
OrgID 21FLHILL_WQX Stations 22, 136, 036, 054
fib_tiny <- fib_hydro |>
dplyr::filter(OrgID == "21FLHILL_WQX",
Station %in% c("22", "136", "036", "054"))
mapview(fib_tiny)fib_long <- fib_tiny |>
mutate(Date = lubridate::ymd(paste(yr, mo, "01"))) |>
rename(Entero = val) |>
select(OrgID, Station, Date, Entero, bay_segment, inches, hydro_load, lag1_prcp, lag1_hydro, geometry) |>
pivot_longer(c(Entero, inches, hydro_load, lag1_prcp, lag1_hydro),
names_to = "variable",
values_to = "value")ggplot(fib_long, aes(x = Date, y = value, col = Station)) +
geom_point() +
geom_line() +
scale_y_log10() +
facet_wrap(~variable, scales = "free_y",
ncol = 1)
Well well well, we have at least one disappearing station, and it’s one where enterococcus was generally high.
ggplot(fib_tiny, aes(x = inches, y = val, col = Station, group = Station)) +
geom_point(size = 2, alpha = 0.7) +
scale_y_log10() +
geom_smooth(method = "lm", se = FALSE) +
labs(x = "Precip",
y = "Entero")
And really it looks like values within a station are sort of within their own range, different than other stations, as opposed to really correlating with precip (at least these few stations, on this graph). Maybe a bit of a relationship if we force it to linear rather than loess.
ggplot(fib_tiny, aes(x = lag1_prcp, y = val, col = Station, group = Station)) +
geom_point(size = 2, alpha = 0.7) +
scale_y_log10() +
geom_smooth(method = "lm", se = FALSE) +
labs(x = "Lagged Precip",
y = "Entero")
A little different, but not much.
ggplot(fib_tiny, aes(x = hydro_load, y = val, col = Station, group = Station)) +
geom_point(size = 2, alpha = 0.7) +
scale_y_log10() +
geom_smooth(method = "lm", se = FALSE) +
labs(x = "Hydro load",
y = "Entero")
fib_tiny <- fib_hydro[c(1, 100, 400, 600, 2000, 5000, 8000, 10000), ]
mapview(fib_tiny)
nearBorder <- rowSums(st_is_within_distance(fib_tiny, tbseg2, dist = 10, sparse = FALSE))
fib_within_x <- fib_tiny[nearBorder > 0, ]
mapview(fib_within100) + mapview(tbseg2)Incorporate day-of-sampling, day + day before (24 hrs), day + 2 days before (48 hrs), and day + week before (7-day)
Working with fib_daily_prcp data frame.
fib_daily_nonbay <- st_filter(fib_daily_prcp, st_union(tbseg2), .predicate = st_disjoint)
fib_daily_long <- fib_daily_prcp |>
pivot_longer(inches:daily_7d,
names_to = "time_period",
values_to = "total") |>
mutate(time_period = case_match(time_period,
"inches" ~ "day of sample",
"daily_24hrs" ~ "sample + previous day",
"daily_48hrs" ~ "sample + 2 days",
"daily_7d" ~ "previous week"))
fib_daily_long_nonbay <- fib_daily_long |>
filter(Station %in% fib_daily_nonbay$Station)ggplot(fib_daily_long, aes(x = total, y = val)) +
geom_point(aes(col = time_period),
alpha = 0.4) +
facet_wrap(~BAY_SEG) +
scale_x_log10() +
scale_y_log10() +
scale_color_okabeito() +
labs(title = "All stations",
subtitle = "note log-log scale",
x = "Precip (inches)",
y = "Enterococcus",
col = "time period of precip total")
ggplot(fib_daily_long, aes(x = total, y = val)) +
geom_point(aes(col = BAY_SEG),
alpha = 0.4) +
facet_wrap(~time_period) +
scale_x_log10() +
scale_y_log10() +
scale_color_okabeito() +
labs(title = "All stations",
subtitle = "note log-log scale",
x = "Precip (inches)",
y = "Enterococcus",
col = "Bay Segment")
ggplot(fib_daily_long_nonbay, aes(x = total, y = val)) +
geom_point(aes(col = time_period),
alpha = 0.4) +
facet_wrap(~BAY_SEG) +
scale_x_log10() +
scale_y_log10() +
scale_color_okabeito() +
labs(title = "Non-bay stations",
subtitle = "note log-log scale",
x = "Precip (inches)",
y = "Enterococcus",
col = "time period of precip total")
ggplot(fib_daily_long_nonbay, aes(x = total, y = val)) +
geom_point(aes(col = BAY_SEG),
alpha = 0.4) +
facet_wrap(~time_period) +
scale_x_log10() +
scale_y_log10() +
scale_color_okabeito() +
labs(title = "Non-bay stations",
subtitle = "note log-log scale",
x = "Precip (inches)",
y = "Enterococcus",
col = "Bay Segment")
Spearman correlations
all_stns <- fib_daily_prcp |>
st_drop_geometry() |>
select(val, inches:daily_7d) |>
set_names(c("Entero", "sample_day_precip",
"sample_and_24hrs",
"sample_and_48hrs",
"previous_week"))corr_all <- cor(all_stns,
use = "pairwise.complete.obs",
method = "spearman")
knitr::kable(corr_all,
caption = "Spearman correlation coefficients",
digits = 3)| Entero | sample_day_precip | sample_and_24hrs | sample_and_48hrs | previous_week | |
|---|---|---|---|---|---|
| Entero | 1.000 | 0.099 | 0.145 | 0.161 | 0.132 |
| sample_day_precip | 0.099 | 1.000 | 0.871 | 0.709 | 0.508 |
| sample_and_24hrs | 0.145 | 0.871 | 1.000 | 0.845 | 0.588 |
| sample_and_48hrs | 0.161 | 0.709 | 0.845 | 1.000 | 0.706 |
| previous_week | 0.132 | 0.508 | 0.588 | 0.706 | 1.000 |
knitr::kable(cor.mtest(corr_all)$p,
caption = "p-values of Spearman correlations",
digits = 3)| Entero | sample_day_precip | sample_and_24hrs | sample_and_48hrs | previous_week | |
|---|---|---|---|---|---|
| Entero | 0.000 | 0.057 | 0.041 | 0.035 | 0.109 |
| sample_day_precip | 0.057 | 0.000 | 0.013 | 0.120 | 0.521 |
| sample_and_24hrs | 0.041 | 0.013 | 0.000 | 0.038 | 0.400 |
| sample_and_48hrs | 0.035 | 0.120 | 0.038 | 0.000 | 0.180 |
| previous_week | 0.109 | 0.521 | 0.400 | 0.180 | 0.000 |
nonbay_stns <- fib_daily_nonbay |>
st_drop_geometry() |>
select(val, inches:daily_7d) |>
set_names(c("Entero", "sample_day_precip",
"sample_and_24hrs",
"sample_and_48hrs",
"previous_week"))corr_all <- cor(nonbay_stns,
use = "pairwise.complete.obs",
method = "spearman")
knitr::kable(corr_all,
caption = "Spearman correlation coefficients",
digits = 3)| Entero | sample_day_precip | sample_and_24hrs | sample_and_48hrs | previous_week | |
|---|---|---|---|---|---|
| Entero | 1.000 | 0.181 | 0.264 | 0.284 | 0.271 |
| sample_day_precip | 0.181 | 1.000 | 0.843 | 0.729 | 0.536 |
| sample_and_24hrs | 0.264 | 0.843 | 1.000 | 0.906 | 0.651 |
| sample_and_48hrs | 0.284 | 0.729 | 0.906 | 1.000 | 0.763 |
| previous_week | 0.271 | 0.536 | 0.651 | 0.763 | 1.000 |
knitr::kable(cor.mtest(corr_all)$p,
caption = "p-values of Spearman correlations",
digits = 3)| Entero | sample_day_precip | sample_and_24hrs | sample_and_48hrs | previous_week | |
|---|---|---|---|---|---|
| Entero | 0.000 | 0.045 | 0.042 | 0.050 | 0.175 |
| sample_day_precip | 0.045 | 0.000 | 0.035 | 0.147 | 0.606 |
| sample_and_24hrs | 0.042 | 0.035 | 0.000 | 0.020 | 0.395 |
| sample_and_48hrs | 0.050 | 0.147 | 0.020 | 0.000 | 0.178 |
| previous_week | 0.175 | 0.606 | 0.395 | 0.178 | 0.000 |
options(scipen = 999)
my_cor <- function(df, x){
cor(df[["val"]], df[[x]],
method = "spearman",
use = "pairwise.complete.obs")
}
my_cor.p <- function(df, x){
cor.test(df[["val"]], df[[x]],
method = "spearman",
use = "pairwise.complete.obs")$p.value
}
nonbay_stns2 <- fib_daily_nonbay |>
st_drop_geometry() |>
filter(BAY_SEG != "Manatee River") |> # no values, messign things up
select(BAY_SEG, val, inches:daily_7d) |>
summarize(.by = BAY_SEG,
cor.sampleday = my_cor(.data, "inches"), # this could all be more elegant
pval.sampleday = my_cor.p(.data, "inches"),
cor.24hrs = my_cor(.data, "daily_24hrs"),
pval.24hrs = my_cor.p(.data, "daily_24hrs"),
cor.48hrs = my_cor(.data, "daily_48hrs"),
pval.48hrs = my_cor.p(.data, "daily_48hrs"),
cor.7d = my_cor(.data, "daily_7d"),
pval.7d = my_cor.p(.data, "daily_7d"))
knitr::kable(nonbay_stns2,
caption = "Spearman Correlations and p-values for Entero~rainfall by bay segment",
digits = 3) | BAY_SEG | cor.sampleday | pval.sampleday | cor.24hrs | pval.24hrs | cor.48hrs | pval.48hrs | cor.7d | pval.7d |
|---|---|---|---|---|---|---|---|---|
| Boca Ciega Bay | 0.193 | 0.043 | 0.415 | 0.000 | 0.511 | 0.000 | 0.527 | 0.000 |
| Hillsborough Bay | 0.271 | 0.000 | 0.305 | 0.000 | 0.337 | 0.000 | 0.325 | 0.000 |
| Lower Tampa Bay | 0.040 | 0.651 | 0.031 | 0.728 | 0.078 | 0.371 | 0.088 | 0.314 |
| Middle Tampa Bay | 0.233 | 0.007 | 0.315 | 0.000 | 0.280 | 0.001 | 0.289 | 0.001 |
| Old Tampa Bay | 0.072 | 0.580 | 0.165 | 0.200 | 0.163 | 0.206 | 0.086 | 0.505 |